/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ package net.nutch.searcher; import java.io.IOException; import java.io.File; import java.util.Enumeration; import java.util.ArrayList; import org.apache.lucene.search.Searchable; //import org.apache.lucene.search.Searcher; //import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiSearcher; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import net.nutch.io.*; import net.nutch.db.*; import net.nutch.fetcher.*; import net.nutch.linkdb.*; import net.nutch.indexer.*; import net.nutch.analysis.NutchDocumentAnalyzer; /** Implements {@link Searcher} and {@link HitDetailer} for either a single * merged index, or for a set of individual segment indexes. */ public class IndexSearcher implements Searcher, HitDetailer { private org.apache.lucene.search.Searcher luceneSearcher; private String[] segmentNames; // for back compat. /** Construct given a number of indexed segments. */ public IndexSearcher(File[] segmentDirs) throws IOException { NutchSimilarity sim = new NutchSimilarity(); Searchable[] searchables = new Searchable[segmentDirs.length]; segmentNames = new String[segmentDirs.length]; for (int i = 0; i < segmentDirs.length; i++) { org.apache.lucene.search.Searcher searcher = new org.apache.lucene.search.IndexSearcher (new File(segmentDirs[i], "index").toString()); searcher.setSimilarity(sim); searchables[i] = searcher; segmentNames[i] = segmentDirs[i].getName(); } this.luceneSearcher = new MultiSearcher(searchables); this.luceneSearcher.setSimilarity(sim); } /** Construct given a directory containing fetched segments, and a separate * directory naming their merged index. */ public IndexSearcher(String index) throws IOException { this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(index); this.luceneSearcher.setSimilarity(new NutchSimilarity()); } public Hits search(Query query, int numHits) throws IOException { org.apache.lucene.search.Query luceneQuery = QueryTranslator.translate(query); return translateHits(luceneSearcher.search(luceneQuery, null, numHits)); } public String getExplanation(Query query, Hit hit) throws IOException { return luceneSearcher.explain(QueryTranslator.translate(query), hit.getIndexDocNo()).toHtml(); } public HitDetails getDetails(Hit hit) throws IOException { ArrayList fields = new ArrayList(); ArrayList values = new ArrayList(); Document doc = luceneSearcher.doc(hit.getIndexDocNo()); Enumeration e = doc.fields(); while (e.hasMoreElements()) { Field field = (Field)e.nextElement(); fields.add(field.name()); values.add(field.stringValue()); } // for back-compatibility with old indexes String segment = doc.get("segment"); if (segment == null) { MultiSearcher multi = (MultiSearcher)luceneSearcher; fields.add("segment"); values.add(segmentNames[multi.subSearcher(hit.getIndexDocNo())]); fields.add("docNo"); values.add(Integer.toString(multi.subDoc(hit.getIndexDocNo()), 16)); } return new HitDetails((String[])fields.toArray(new String[fields.size()]), (String[])values.toArray(new String[values.size()])); } public HitDetails[] getDetails(Hit[] hits) throws IOException { HitDetails[] results = new HitDetails[hits.length]; for (int i = 0; i < hits.length; i++) results[i] = getDetails(hits[i]); return results; } private Hits translateHits(TopDocs topDocs) throws IOException { ScoreDoc[] scoreDocs = topDocs.scoreDocs; int length = scoreDocs.length; Hit[] hits = new Hit[length]; for (int i = 0; i < length; i++) { hits[i] = new Hit(scoreDocs[i].doc, scoreDocs[i].score); } return new Hits(topDocs.totalHits, hits); } }